import json
import re

import scrapy
from scrapy import Request


class A51jobSpider(scrapy.Spider):
    name = '51job'
    allowed_domains = ['51job.com']
    start_urls = ['https://search.51job.com/list/000000,000000,0000,00,9,99,python,2,1.html?lang=c&postchannel=0000&workyear=99&cotype=99&degreefrom=99&jobterm=99&companysize=99&ord_field=0&dibiaoid=0&line=&welfare=']

    # 'Cookie': '_uab_collina=165042466937947220590952; privacy=1650424662; guid=54d7afbd5f791d780b1e4fe3ec7df5a5; nsearch=jobare
    # a%3D%26%7C%26ord_field%3D%26%7C%26recentSearch0%3D%26%7C%26recentSearch1%3D%26%7C%26recentSearch2%3D%26%7C%26recentSearch3%
    # 3D%26%7C%26recentSearch4%3D%26%7C%26collapse_expansion%3D; search=jobarea%7E%60000000%7C%21ord_field%7E%600%7C%21recentSear
    # ch0%7E%60000000%A1%FB%A1%FA000000%A1%FB%A1%FA0000%A1%FB%A1%FA00%A1%FB%A1%FA99%A1%FB%A1%FA%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB
    # %A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA9%A1%FB%A1%FA99%A1%FB%A1%FA%A1%FB%A1%FA0%A1%FB%A1%FApython%A1%FB%A1%FA2%A1%FB%A1%FA1%7C%2
    # 1; partner=sem_pcbaidupz_2; acw_tc=76b20fe216504593430912006e1cf9253cf32be901423167edeb09019dca6a; ssxmod_itna=eq0xRi0=D=3D
    # u4Bc8e5DkDcDXiite8OrDuBa=D0y0YGzDAxn40iDtPPyBeOB7jEYcKbFlnkpq4tFOCE5bob6CyYDHxY=DUrRGpUD4fKGwD0eG+DD4DWDmWFDnxAQDjxGPnUpNH=
    # DEDm48DWPDYxDrAoKDRxi7DDydHx07DQ58Hi+qAaBaoKHmXrbhqQD8D7ymDlaKAIOwEUfqya7AwSH3+x0kB40OuP5zOPoDUjFzng5Ndm+xbjq4rm23mWG5dBDoH
    # YxPq0a4OM0t7Qi4eD4qo7rSaBDDi=GqaY2D4D=; ssxmod_itna2=eq0xRi0=D=3Du4Bc8e5DkDcDXiite8OrDuB=DnKSDo24DsHmqDLW7BOMGmBZbIGKGFFj0F
    # +sceI202HYKI2YIv3uC2oUhQCtYu0PtS9WAxjWq3jGTw4u=DE7hmjgT=ZRnUDTZH20sr=pfh3hbLoDudXe2wU42b6Eib0hzkRxROokUY0QKnohY0Ni5Dt84fiBK
    # mDi1OYTTjhhQDrtdqNQDPbdqlTflGtuf2qEzgXToBPp8S4tVQgp6OGH8waD5iUxmlEXFSdfQY6PBPfpCeoqmkhX1FluaFl6d1YbfcBHx6M890hzvcK77mIAGH03
    # 5gjXG3mG=8ltQc0KIxprq27D7i52D=lE2l0ogQ0MEKki+iFq/e6mp6TgNrSpbUbr3RWU2HtuF9Nazb3bqeeYerqMGpe9LQfvzxpyBPNUT/DojibhDEGQoY2fmPf
    # Hp2DpoXiwLRchfKRmtG9dl7GZcL9iLRfINFwo2aFO5FaK2QEnmdMAXntvj9FRQasUPDTBdPPXtBA4Tp7mLbKwC7V/BVgqa3g8CcAP5lpDWDG202aQGR/wKAuB8+
    # AXDEv5KRDfZiTD2N/u4ieo82EDRBb4D26UOh5D08DiQ5YD'

    def start_requests(self):
        for url in self.start_urls:
            yield Request(url, dont_filter=True, )

    def parse(self, response):
        # with open('51job.html', 'w') as f:
        #     f.write(response.text)
        results = re.findall('window.__SEARCH__RESULT__ = (.*?)</script>', response.text)[0]
        print('results=', results)
        data_dict = json.loads(results)
        data_list = data_dict.get('engine_jds')
        for data in data_list:
            print('data', data)
